MATH 318 Final Project: The Effect of World Cup on Stock Price and Trading Activity

library(tidyverse)
library(dplyr)
library(MASS)
library(lubridate)
library(stringr)
library(GGally)
library(knitr)
library(kableExtra)

Load Data

spydata = as_tibble(read.csv("1_min_SPY_2008-2021.csv"))
worldcupmatches = as_tibble(read.csv("WorldCupMatches.csv"))

Helper Functions

Get Only Spy Data for a specific Game

#Clean SpyData Per Game
getSpyDataWithinRangeofGame = function(spydata,game_date,range){
  rangeInSeconds = 60 * 60 * range
  return(filter(spydata, (date >= game_date - rangeInSeconds) & (date <= game_date + rangeInSeconds)))
}

Check if there is Spy data for a game

hasSpyDataWithinRangeOfGame = function(spydata, game_dates, range){
  list = c()
  for(game_date in 1:length(game_dates)){
    list = append(list, nrow(getSpyDataWithinRangeofGame(spydata, game_date = game_dates[game_date], range)) != 0)
  }
  return(list)
}

Get Spy Data combined with Game Data for a set of Games

#Clean Spydata Per Worldcup Returns a set of spydata with their corresponding game data
getSpyAndGameDataWithinWorldcup = function(worldcup, spydata, range){
  z = getSpyDataWithinRangeofGame(spydata, worldcup[[1,"Datetime"]], range)
  gamerow = worldcup[1,]
  for(colIndx in 1: ncol(gamerow)){
      colvalue = worldcup[[1, colIndx]]
      colname = colnames(worldcup)[colIndx]
      z[colname] = rep(colvalue, times= nrow(z))
  }
  z["time.from.game"] = as.numeric(difftime(z$date, worldcup[[1,"Datetime"]],units = "secs"))
  for(gameIndx in 2:nrow(worldcup)){
   x = getSpyDataWithinRangeofGame(spydata, worldcup[[gameIndx,"Datetime"]], range)
   gamerow = worldcup[gameIndx,]
   for(colIndx in 1: ncol(gamerow)){
      colvalue = worldcup[[gameIndx, colIndx]]
      colname = colnames(worldcup)[colIndx]
      x[colname] = rep(colvalue, times= nrow(x))
   }
   x["time.from.game"] = as.numeric(difftime(x$date, x$Datetime, units = "secs"))
  
   z = union_all(z,x)
  }
  return(z)
}

Get Spy Data combined with Game Data for a single game

#Gives Spydata and Difference from the game time for each worldcup game
getSpyAndGameDataForOneGame = function(spydata,worldcup, game_index, range){
  z = getSpyDataWithinRangeofGame(spydata, worldcup[[game_index,"Datetime"]], range)
  for(colIndx in 1: ncol(worldcup[game_index,])){
      colvalue = worldcup[[game_index, colIndx]]
      colname = colnames(worldcup)[colIndx]
      z[colname] = rep(colvalue, times= nrow(z))
  }
  z["time.from.game"] = as.numeric(difftime(z$date, rep(worldcup[[game_index,"Datetime"]], times = nrow(z)), units="secs"))
  return(z)
}

Cleaning Data

#Update 
spydata$date = as.POSIXct(spydata$date, format="%Y%m%d %H:%M:%S")

#Remove Rows containing NA's
cleaned_worldcupmatches = unique(worldcupmatches[!apply(is.na(worldcupmatches) | worldcupmatches == "", 1, all),])

#Convert Date and Time into POSIX EDT 
cleaned_worldcupmatches$Datetime = as.POSIXct(cleaned_worldcupmatches$Datetime, format = "%e %b %Y - %R") - 60 * 60

#Filter Games on the Weekend
cleaned_worldcupmatches = filter(cleaned_worldcupmatches, wday(as.Date(Datetime)) != 7 & wday(as.Date(Datetime)) != 1)

#Filter Games that have no corresponding Data
cleaned_worldcupmatches = add_column(cleaned_worldcupmatches,"HasSpyData" = hasSpyDataWithinRangeOfGame(spydata, cleaned_worldcupmatches$Datetime, 3))
cleaned_worldcupmatches = filter(cleaned_worldcupmatches, cleaned_worldcupmatches$HasSpyData == TRUE)

cleaned_worldcupmatches

Get Spy Data

#Get Spy data within 3 hours of the Game For both World cups
allspydata2014 = getSpyAndGameDataWithinWorldcup(filter(cleaned_worldcupmatches, Year==2014), spydata, 3)

allspydata2010 = getSpyAndGameDataWithinWorldcup(filter(cleaned_worldcupmatches, Year==2010), spydata, 3)

Normalizing the average price

#Normalize the Data so that we can compare fairly the growth of stock price
max.2014 = max(allspydata2014$average)
max.2010 = max(allspydata2010$average)


min.2014 = min(allspydata2014$average)
min.2010 = min(allspydata2010$average)

difference.2014 = (max.2014) - (min.2014)
difference.2010 = (max.2010) - (min.2010)
normalized2014average = (allspydata2014$average-min.2014)/difference.2014
normalized2010average = (allspydata2010$average-min.2010)/difference.2010


allspydata2014 = add_column(allspydata2014, "normalized.average" = normalized2014average)
allspydata2010 = add_column(allspydata2010, "normalized.average" = normalized2010average)

allspydata = union_all(allspydata2014, allspydata2010)

allspydata
NA

Exploring the Relationship of Time and Volume

Let us explore the Correlation between the Time form game and the Price of the stock

Correlations

ggpairs(allspydata, columns = c("time.from.game", "average", "volume", "normalized.average"))

kable(cor(allspydata[, c(7,9,31,32)]))
volume average time.from.game normalized.average
volume 1.0000000 -0.4039908 0.0334462 0.1264736
average -0.4039908 1.0000000 0.2149680 -0.3961910
time.from.game 0.0334462 0.2149680 1.0000000 0.0158785
normalized.average 0.1264736 -0.3961910 0.0158785 1.0000000

Linear Regression

ggplot(allspydata) + geom_point(aes(time.from.game,normalized.average))

ggplot(allspydata) + geom_point(aes(time.from.game,log(volume)))

polymodel = lm(log(volume) ~ poly(time.from.game, 4), data = allspydata)
polymodel

Call:
lm(formula = log(volume) ~ poly(time.from.game, 4), data = allspydata)

Coefficients:
             (Intercept)  poly(time.from.game, 4)1  poly(time.from.game, 4)2  poly(time.from.game, 4)3  poly(time.from.game, 4)4  
                  7.4268                   -0.9477                    6.4989                   22.3316                    9.7577  
ggplot(allspydata, aes(time.from.game,log(volume))) + geom_point() + geom_smooth()

Sample Games from a World Cup

set.seed(100) 
samplesize = 10
worldcupmatches2014 = filter(cleaned_worldcupmatches, Year == 2014)
sampleworldcupgames = sample_n(worldcupmatches2014,size = samplesize)

print(worldcupmatches2014)
print(sampleworldcupgames)

After

game1 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 1, 3)
volumesp1 <- ggplot(game1) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[1]) + ggtitle("Volume over Time, Sample Game 1")
volumesp1

game2 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 2, 3)
volumesp2 <- ggplot(game2) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[2])+ ggtitle("Volume over Time, Sample Game 2")
volumesp2

#ERROR AGAIN WE DONT HAVE FULL DATA FOR THIS GAME
game3 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 3, 3)
volumesp3 <- ggplot(game3) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[3])+ ggtitle("Volume over Time, Sample Game 3")
volumesp3

game4 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 4, 3)
volumesp4 <- ggplot(game4) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[4])+ ggtitle("Volume over Time, Sample Game 4")
volumesp4

game5 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 5, 3)
volumesp5 <- ggplot(game5) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[5])+ ggtitle("Volume over Time, Sample Game 5")
volumesp5

Price Scatter Plots

Using average price

pricesp1 <- ggplot(game1) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[1])+ ggtitle("Price over Time, Sample Game 1")
pricesp1

pricesp2 <- ggplot(game2) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[2])+ ggtitle("Price over Time, Sample Game 2")
pricesp2

#ISSUE BECAUSE WE DONT HAVE DATA FOR 16:00 and that is time of the game
pricesp3 <- ggplot(game3) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[3])+ ggtitle("Price over Time, Sample Game 3")
pricesp3

pricesp4 <- ggplot(game4) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[4])+ ggtitle("Price over Time, Sample Game 4")
pricesp4

pricesp5 <- ggplot(game5, aes(date)) + geom_point(aes(y=average, ), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[5])+ ggtitle("Price over Time, Sample Game 5") + geom_col(aes(y=(volume))) + scale_y_continuous(sec.axis = sec_axis(~./100, name = "average"))
pricesp5

Just USA Games

worldcupmatchesUSAhome = filter(cleaned_worldcupmatches, Home.Team.Name == "USA")
worldcupmatchesUSAaway = filter(cleaned_worldcupmatches, Away.Team.Name == "USA")
worldcupmatchesUSA <- union_all(worldcupmatchesUSAaway,worldcupmatchesUSAhome)
worldcupmatchesUSA
game1USA <- getSpyAndGameDataForOneGame(spydata, worldcupmatchesUSA, 1, 3)
volumesp1USA <- ggplot(game1USA) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[1])+ ggtitle("Volume over Time, USA Game 1, 2010")
volumesp1USA

game2USA <- getSpyAndGameDataForOneGame(spydata, worldcupmatchesUSA, 2, 3)
volumesp2USA <- ggplot(game2USA) + geom_point(mapping = aes(x=date, y=volume), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[2])+ ggtitle("Volume over Time, USA Game 2, 2014")
volumesp2USA

game3USA <- getSpyAndGameDataForOneGame(spydata, worldcupmatchesUSA, 3, 3)
volumesp3USA <- ggplot(game3USA) + geom_point(mapping = aes(x=date, y=volume), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[3])+ ggtitle("Volume over Time, USA Game 2, 2010")
volumesp3USA

game4USA <- getSpyAndGameDataForOneGame(spydata, worldcupmatchesUSA, 4, 3)
volumesp4USA <- ggplot(game4USA) + geom_point(mapping = aes(x=date, y=volume), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[4])+ ggtitle("Volume over Time, USA Game 1, 2014")
volumesp4USA

pricesp4USA <- ggplot(game4USA) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[4])+ ggtitle("Price over Time, USA Game 1, 2014")
pricesp4USA

What time should you look to sell?

price_list = c()
time_list = c()
for (game in 1:nrow(cleaned_worldcupmatches)) {
  price_game_data <- getSpyAndGameDataForOneGame(spydata, cleaned_worldcupmatches, game, 3)
  highestdatapoint = price_game_data[which.max(price_game_data$average),]
  highestpriceforgame <- highestdatapoint$average
  time <- highestdatapoint$"time.from.game"
  price_list = append(price_list, highestpriceforgame)
  time_list = append(time_list, time)
}
optimal_selling.df <- data.frame("price" = price_list, "time" = time_list)
optimal_selling.df

Do THis for Optimal Buying time?

#DO THIS FOR OPTIMAL TIME TO BUY?
ggplot(optimal_selling.df) + geom_histogram(mapping = aes(x= time)) + ggtitle("How Often Best Selling Time is vs. Time From Game")

What time should you look to buy?

price_list = c()
time_list = c()
for (game in 1:nrow(cleaned_worldcupmatches)) {
  price_game_data <- getSpyAndGameDataForOneGame(spydata, cleaned_worldcupmatches, game, 3)
  lowestdatapoint = price_game_data[which.min(price_game_data$average),]
  lowestpriceforgame <- lowestdatapoint$average
  time <- lowestdatapoint$"time.from.game"
  price_list = append(price_list, lowestpriceforgame)
  time_list = append(time_list, time)
}
ggplot(optimal_buying.df) + geom_histogram(mapping = aes(x= time)) + ggtitle("How Often Best Buying Time is vs. Time From Game")

---
title: "MATH 318 Final Project"
output: html_notebook
---

# MATH 318 Final Project: The Effect of World Cup on Stock Price and Trading Activity

```{r}
library(tidyverse)
library(dplyr)
library(MASS)
library(lubridate)
library(stringr)
library(GGally)
library(knitr)
library(kableExtra)
```

# Load Data

```{r}
spydata = as_tibble(read.csv("1_min_SPY_2008-2021.csv"))
```

```{r}
worldcupmatches = as_tibble(read.csv("WorldCupMatches.csv"))
```

# Helper Functions

## Get Only Spy Data for a specific Game

```{r}
#Clean SpyData Per Game
getSpyDataWithinRangeofGame = function(spydata,game_date,range){
  rangeInSeconds = 60 * 60 * range
  return(filter(spydata, (date >= game_date - rangeInSeconds) & (date <= game_date + rangeInSeconds)))
}
```

## Check if there is Spy data for a game

```{r}
hasSpyDataWithinRangeOfGame = function(spydata, game_dates, range){
  list = c()
  for(game_date in 1:length(game_dates)){
    list = append(list, nrow(getSpyDataWithinRangeofGame(spydata, game_date = game_dates[game_date], range)) != 0)
  }
  return(list)
}
```

## Get Spy Data combined with Game Data for a set of Games

```{r}
#Clean Spydata Per Worldcup Returns a set of spydata with their corresponding game data
getSpyAndGameDataWithinWorldcup = function(worldcup, spydata, range){
  z = getSpyDataWithinRangeofGame(spydata, worldcup[[1,"Datetime"]], range)
  gamerow = worldcup[1,]
  for(colIndx in 1: ncol(gamerow)){
      colvalue = worldcup[[1, colIndx]]
      colname = colnames(worldcup)[colIndx]
      z[colname] = rep(colvalue, times= nrow(z))
  }
  z["time.from.game"] = as.numeric(difftime(z$date, worldcup[[1,"Datetime"]],units = "secs"))
  for(gameIndx in 2:nrow(worldcup)){
   x = getSpyDataWithinRangeofGame(spydata, worldcup[[gameIndx,"Datetime"]], range)
   gamerow = worldcup[gameIndx,]
   for(colIndx in 1: ncol(gamerow)){
      colvalue = worldcup[[gameIndx, colIndx]]
      colname = colnames(worldcup)[colIndx]
      x[colname] = rep(colvalue, times= nrow(x))
   }
   x["time.from.game"] = as.numeric(difftime(x$date, x$Datetime, units = "secs"))
  
   z = union_all(z,x)
  }
  return(z)
}
```

## Get Spy Data combined with Game Data for a single game

```{r}
#Gives Spydata and Difference from the game time for each worldcup game
getSpyAndGameDataForOneGame = function(spydata,worldcup, game_index, range){
  z = getSpyDataWithinRangeofGame(spydata, worldcup[[game_index,"Datetime"]], range)
  for(colIndx in 1: ncol(worldcup[game_index,])){
      colvalue = worldcup[[game_index, colIndx]]
      colname = colnames(worldcup)[colIndx]
      z[colname] = rep(colvalue, times= nrow(z))
  }
  z["time.from.game"] = as.numeric(difftime(z$date, rep(worldcup[[game_index,"Datetime"]], times = nrow(z)), units="secs"))
  return(z)
}
```

# Cleaning Data

```{r}
#Update 
spydata$date = as.POSIXct(spydata$date, format="%Y%m%d %H:%M:%S")

#Remove Rows containing NA's
cleaned_worldcupmatches = unique(worldcupmatches[!apply(is.na(worldcupmatches) | worldcupmatches == "", 1, all),])

#Convert Date and Time into POSIX EDT 
cleaned_worldcupmatches$Datetime = as.POSIXct(cleaned_worldcupmatches$Datetime, format = "%e %b %Y - %R") - 60 * 60

#Filter Games on the Weekend
cleaned_worldcupmatches = filter(cleaned_worldcupmatches, wday(as.Date(Datetime)) != 7 & wday(as.Date(Datetime)) != 1)

#Filter Games that have no corresponding Data
cleaned_worldcupmatches = add_column(cleaned_worldcupmatches,"HasSpyData" = hasSpyDataWithinRangeOfGame(spydata, cleaned_worldcupmatches$Datetime, 3))
cleaned_worldcupmatches = filter(cleaned_worldcupmatches, cleaned_worldcupmatches$HasSpyData == TRUE)

cleaned_worldcupmatches
```

## Get Spy Data

```{r}
#Get Spy data within 3 hours of the Game For both World cups
allspydata2014 = getSpyAndGameDataWithinWorldcup(filter(cleaned_worldcupmatches, Year==2014), spydata, 3)

allspydata2010 = getSpyAndGameDataWithinWorldcup(filter(cleaned_worldcupmatches, Year==2010), spydata, 3)

```

## Normalizing the average price

```{r}
#Normalize the Data so that we can compare fairly the growth of stock price
max.2014 = max(allspydata2014$average)
max.2010 = max(allspydata2010$average)


min.2014 = min(allspydata2014$average)
min.2010 = min(allspydata2010$average)

difference.2014 = (max.2014) - (min.2014)
difference.2010 = (max.2010) - (min.2010)
normalized2014average = (allspydata2014$average-min.2014)/difference.2014
normalized2010average = (allspydata2010$average-min.2010)/difference.2010


allspydata2014 = add_column(allspydata2014, "normalized.average" = normalized2014average)
allspydata2010 = add_column(allspydata2010, "normalized.average" = normalized2010average)

allspydata = union_all(allspydata2014, allspydata2010)

allspydata

```

## 

# Exploring the Relationship of Time and Volume

Let us explore the Correlation between the Time form game and the Price of the stock

## Correlations

```{r}
ggpairs(allspydata, columns = c("time.from.game", "average", "volume", "normalized.average"))
```

```{r}
kable(cor(allspydata[, c(7,9,31,32)]))
```

## Linear Regression

```{r}
ggplot(allspydata) + geom_point(aes(time.from.game,normalized.average))
```

```{r}
ggplot(allspydata) + geom_point(aes(time.from.game,log(volume)))
```

```{r}
polymodel = lm(log(volume) ~ poly(time.from.game, 4), data = allspydata)
polymodel
```

```{r}
ggplot(allspydata, aes(time.from.game,log(volume))) + geom_point() + geom_smooth()

```

# Sample Games from a World Cup

```{r}
set.seed(100) 
samplesize = 10
worldcupmatches2014 = filter(cleaned_worldcupmatches, Year == 2014)
sampleworldcupgames = sample_n(worldcupmatches2014,size = samplesize)
```

After

```{r}
game1 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 1, 3)
volumesp1 <- ggplot(game1) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[1]) + ggtitle("Volume over Time, Sample Game 1")
volumesp1
```

```{r}
game2 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 2, 3)
volumesp2 <- ggplot(game2) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[2])+ ggtitle("Volume over Time, Sample Game 2")
volumesp2
#ERROR AGAIN WE DONT HAVE FULL DATA FOR THIS GAME
```

```{r}
game3 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 3, 3)
volumesp3 <- ggplot(game3) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[3])+ ggtitle("Volume over Time, Sample Game 3")
volumesp3
```

```{r}
game4 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 4, 3)
volumesp4 <- ggplot(game4) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[4])+ ggtitle("Volume over Time, Sample Game 4")
volumesp4
```

```{r}
game5 <- getSpyAndGameDataForOneGame(spydata, sampleworldcupgames, 5, 3)
volumesp5 <- ggplot(game5) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[5])+ ggtitle("Volume over Time, Sample Game 5")
volumesp5
```

## Price Scatter Plots

Using average price

```{r}
pricesp1 <- ggplot(game1) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[1])+ ggtitle("Price over Time, Sample Game 1")
pricesp1
```

```{r}
pricesp2 <- ggplot(game2) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[2])+ ggtitle("Price over Time, Sample Game 2")
pricesp2
#ISSUE BECAUSE WE DONT HAVE DATA FOR 16:00 and that is time of the game
```

```{r}
pricesp3 <- ggplot(game3) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[3])+ ggtitle("Price over Time, Sample Game 3")
pricesp3
```

```{r}
pricesp4 <- ggplot(game4) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[4])+ ggtitle("Price over Time, Sample Game 4")
pricesp4
```

```{r}
pricesp5 <- ggplot(game5, aes(date)) + geom_point(aes(y=average, ), color ="red") + geom_vline(xintercept= sampleworldcupgames$Datetime[5])+ ggtitle("Price over Time, Sample Game 5") + geom_col(aes(y=(volume))) + scale_y_continuous(sec.axis = sec_axis(~./100, name = "average"))
pricesp5
```

## Just USA Games

```{r}
worldcupmatchesUSAhome = filter(cleaned_worldcupmatches, Home.Team.Name == "USA")
worldcupmatchesUSAaway = filter(cleaned_worldcupmatches, Away.Team.Name == "USA")
worldcupmatchesUSA <- union_all(worldcupmatchesUSAaway,worldcupmatchesUSAhome)
worldcupmatchesUSA
```

```{r}
game1USA <- getSpyAndGameDataForOneGame(spydata, worldcupmatchesUSA, 1, 3)
volumesp1USA <- ggplot(game1USA) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[1])+ ggtitle("Volume over Time, USA Game 1, 2010")
volumesp1USA
```

```{r}
game2USA <- getSpyAndGameDataForOneGame(spydata, worldcupmatchesUSA, 2, 3)
volumesp2USA <- ggplot(game2USA) + geom_point(mapping = aes(x=date, y=log(volume)), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[2])+ ggtitle("Volume over Time, USA Game 2, 2014")
volumesp2USA
```

```{r}
game3USA <- getSpyAndGameDataForOneGame(spydata, worldcupmatchesUSA, 3, 3)
volumesp3USA <- ggplot(game3USA) + geom_point(mapping = aes(x=date, y=volume), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[3])+ ggtitle("Volume over Time, USA Game 2, 2010")
volumesp3USA
```

```{r}
game4USA <- getSpyAndGameDataForOneGame(spydata, worldcupmatchesUSA, 4, 3)
volumesp4USA <- ggplot(game4USA) + geom_point(mapping = aes(x=date, y=volume), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[4])+ ggtitle("Volume over Time, USA Game 1, 2014")
volumesp4USA
```

```{r}
pricesp4USA <- ggplot(game4USA) + geom_point(mapping = aes(x=date, y=average), color ="red") + geom_vline(xintercept= worldcupmatchesUSA$Datetime[4])+ ggtitle("Price over Time, USA Game 1, 2014")
pricesp4USA
```

# What time should you look to sell?

```{r}
price_list = c()
time_list = c()
for (game in 1:nrow(cleaned_worldcupmatches)) {
  price_game_data <- getSpyAndGameDataForOneGame(spydata, cleaned_worldcupmatches, game, 3)
  highestdatapoint = price_game_data[which.max(price_game_data$average),]
  highestpriceforgame <- highestdatapoint$average
  time <- highestdatapoint$"time.from.game"
  price_list = append(price_list, highestpriceforgame)
  time_list = append(time_list, time)
}
```

```{r}
optimal_selling.df <- data.frame("price" = price_list, "time" = time_list)
optimal_selling.df
```

Do THis for Optimal Buying time?

```{r}
ggplot(optimal_selling.df) + geom_histogram(mapping = aes(x= time)) + ggtitle("How Often Best Selling Time is vs. Time From Game")
```

# What time should you look to buy?

```{r}
price_list = c()
time_list = c()
for (game in 1:nrow(cleaned_worldcupmatches)) {
  price_game_data <- getSpyAndGameDataForOneGame(spydata, cleaned_worldcupmatches, game, 3)
  lowestdatapoint = price_game_data[which.min(price_game_data$average),]
  lowestpriceforgame <- lowestdatapoint$average
  time <- lowestdatapoint$"time.from.game"
  price_list = append(price_list, lowestpriceforgame)
  time_list = append(time_list, time)
}
```

```{r}
optimal_buying.df <- data.frame("price" = price_list, "time" = time_list)
ggplot(optimal_buying.df) + geom_histogram(mapping = aes(x= time)) + ggtitle("How Often Best Buying Time is vs. Time From Game")
```
